# 用lxml.etree模块
from lxml import etree

# 解析文件：（本地文件）：etree.parse('文件')
# （2）服务器响应文件：etree.HTML(服务器响应文件)
tree = etree.parse('222.html')
# 需要用到xpath提取器：tree.xpath(提取路径)
# 路径：//（）、/(儿子)（1）//body/ul/li(2)//body//li(3)//ul/li
doc = tree.xpath('//body/ul/li')
print(doc)
doc = tree.xpath('//body//li')
print(doc)
doc = tree.xpath('//ul/li')
print(doc)
doc = tree.xpath('//li')
print(doc)

# 看懂：/text()
doc = tree.xpath('//body/ul/li/text()')
print(doc)

# 属性定位:标签[@属性]
doc = tree.xpath('//ul/li[@id]/text()')
print(doc)

# 标签[@属性=属性值](单双，双单)
doc = tree.xpath('//ul/li[@id="b"]/text()')
print(doc)

# 找到id为"b"的li标签的class属性的属性值,下标索引
doc = tree.xpath('//ul/li[@id="b"]/@class')[0]
print(doc)